between 2016 and 2022 there have been major shifts in terms of majorities in the US congress:
in 2016, both chambers were hold by Republicans
in 2018, the Democrats gained a majority in Congress
in 2020 the Democrats gained Congress and Senate
While one might expect that the post 2016 and 2020 congresses will vary in their policies. However, it is interesting to also focus on the period between 2018 and 2020: When both chambers had different majorities and needed to cooperate.
We will focus on the question on whether and how the different majorities had an impact on the policies that have been passed by Congress.
We scraped our data from: https://data.gov/developers/apis/index.html
df <- read_csv("https://raw.githubusercontent.com/juka19/tad_assignment3/main/data/data_11_28.csv")
## New names:
## Rows: 920 Columns: 13
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (3): subjects, summary, policy_area dbl (8): ...1, ...2, ...3, Unnamed: 0, bill
## number, cosponsor_D_perc, cospo... date (2): latest_action, date
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
## • `...1` -> `...2`
## • `...2` -> `...3`
head(df, 5)
## # A tibble: 5 × 13
## ...1 ...2 ...3 Unnamed…¹ bill …² subje…³ summary polic…⁴ latest_a…⁵ cospo…⁶
## <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <chr> <chr> <date> <dbl>
## 1 1 1 0 0 4996 "{'leg… "Bankr… Financ… 2021-01-12 0.6
## 2 2 2 1 1 8906 "{'leg… "Lifes… Health 2021-01-05 0
## 3 3 3 2 2 8810 "{'leg… "Natio… Emerge… 2021-01-05 1
## 4 4 4 3 3 8611 "{'leg… "Desig… Govern… 2021-01-05 0.5
## 5 5 5 4 4 8354 "{'leg… "Servi… Civil … 2021-01-05 0.714
## # … with 3 more variables: cosponsor_R_perc <dbl>, date <date>, session <dbl>,
## # and abbreviated variable names ¹`Unnamed: 0`, ²`bill number`, ³subjects,
## # ⁴policy_area, ⁵latest_action, ⁶cosponsor_D_perc
#if two thirds of the sponsors are democrats, we consider the bill democrat-dominated
#same for republicans
#if there is no clear majority, they are "Both"
df$party <- ifelse(df$cosponsor_D_perc > 0.66, "Democrat", ifelse(df$cosponsor_R_perc > 0.66, "Republican", "Both"))
ggplot(df, aes(x = cosponsor_D_perc)) +
geom_histogram(aes(y=..density..), colour="black", fill="white") +
geom_density(alpha=.1, fill="blue") +
labs(title="Density of bill cosposor party",
x ="Cosponsor party composition", y = "Density",
caption = "Numbers represent proportion of cosponsors from Democratic party,
so 0.0 represents bills that were fully Republican and 1.0 represents
bills that were fully Democrat.") +
theme_minimal()
## Warning: The dot-dot notation (`..density..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(density)` instead.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
df_corp <- df
df_corp <- df_corp %>% rename(text = summary)
corp <- corpus(df_corp)
## Warning: NA is replaced by empty string
dfmat <- corp %>%
tokens(remove_punct = TRUE, remove_numbers = TRUE, remove_symbols = TRUE) %>%
tokens_remove(patter = stopwords("en")) %>%
tokens_replace(pattern = lexicon::hash_lemmas$token, replacement = lexicon::hash_lemmas$lemma) %>%
tokens_wordstem() %>%
tokens_remove(c("sec","bill","act", "section", "funds", "shall","must", "use", "author","fund","provid","program","requir","divis","titl","appropri","specifi")) %>%
dfm()
dfmatCon <- dfm(corp, remove = stopwords("english"), remove_numbers = TRUE, remove_punct = TRUE, groups = corp$session) %>%dfm_remove(c("sec","bill","act", "section", "funds", "shall","must", "used")) %>%
dfm_trim(min_termfreq = 3)
textplot_wordcloud(dfmatCon, comparison = TRUE, max_words = 300,
color = c("blue", "red"))
#Wordcloud congress 115
dfmat_115 <- dfm_subset(dfmat, session == 115)
corp_115 <- df %>% filter(session == 115) %>% rename(text = summary) %>% corpus()
modelpart15 <- dfm(corp_115, remove = stopwords("english"),remove_numbers = TRUE, remove_punct = TRUE, groups = corp_115$party) %>%
dfm_remove(c("sec","bill","act", "section", "funds", "shall","must", "used")) %>%
dfm_trim(min_termfreq = 3)
mp15 <- textplot_wordcloud(modelpart15, comparison = TRUE, max_words = 300,
color = c("green","blue", "red"))
corp_116 <- df %>% filter(session == 116) %>% rename(text = summary) %>% corpus()
modelpart16 <- dfm(corp_116, remove = stopwords("english"), remove_numbers = TRUE, remove_punct = TRUE, groups = corp_116$party) %>% dfm_remove(c("sec","bill","act", "section", "funds", "shall","must", "used")) %>%
dfm_trim(min_termfreq = 3)
mp16 <- textplot_wordcloud(modelpart16, comparison = TRUE, max_words = 300,
color = c("green","blue", "red"))
corp2 <- corpus(df$summary)
## Warning: NA is replaced by empty string
dfmat2 <- corp2 %>%
tokens(remove_punct = TRUE) %>%
tokens_remove(patter = stopwords("en")) %>%
dfm() %>%
dfm_trim(min_termfreq = 5)
embeddings <- umap(as.matrix(dfmat2))
df$x <- embeddings[,1]
df$y <- embeddings[,2]
colordict <- c( "Democrat"="blue","Republican"="red", "Both"="yellow")
p <- ggplot(df, aes(x, y, fill=party)) +
geom_point(color="grey", shape=21, size=0.5) +
scale_fill_manual(values=colordict) +
theme_bw()
p
ggplotly(p)
df1 <- df %>%
mutate(party_full = ifelse(cosponsor_D_perc == 1.0, "Dem",
ifelse(cosponsor_R_perc == 1.0, "Rep", NA))) %>%
drop_na(party_full)
corp3 <- corpus(df1$summary)
dfmat3 <- corp3 %>%
tokens(remove_punct = TRUE) %>%
tokens_remove(patter = stopwords("en")) %>%
dfm() %>%
dfm_trim(min_termfreq = 5)
embeddings2 <- umap(as.matrix(dfmat3))
df1$x <- embeddings2[,1]
df1$y <- embeddings2[,2]
colordict2 <- c( "Democrat"="blue","Republican"="red")
j <- ggplot(df1, aes(x, y, fill=party)) +
geom_point(color="grey", shape=21, size=0.5) +
scale_fill_manual(values=colordict2) +
theme_bw()
j
ggplotly(j)
##Sentiment analysis
summary_sentiment <- read_csv("https://raw.githubusercontent.com/juka19/tad_assignment3/main/data/data_w_vader.csv")
## New names:
## Rows: 920 Columns: 18
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (3): subjects, summary, policy_area dbl (13): ...1, Unnamed: 0, ...3, ...4,
## Unnamed: 0.1, bill number, cosponso... date (2): latest_action, date
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
## • `...1` -> `...3`
## • `...2` -> `...4`
summary_sentiment$party <- ifelse(summary_sentiment$cosponsor_D_perc > 0.66, "Democrat", ifelse(summary_sentiment$cosponsor_R_perc > 0.66, "Republican", "Both"))
wide_sentiment <- summary_sentiment %>%
group_by(party, date) %>%
summarise(score = mean(compound)) %>%
pivot_wider(names_from = party, values_from = score) %>%
select(-c("Both", "NA"))
## `summarise()` has grouped output by 'party'. You can override using the
## `.groups` argument.
days <- data.frame(date = seq(as.Date("2017-01-01"),as.Date("2022-12-31"),1))
daily_sentiment <- days %>%
left_join(wide_sentiment) %>%
pivot_longer(cols = -date, names_to="party", values_to="score")
## Joining, by = "date"
p4 <- ggplot(daily_sentiment, aes(x=date, y = score, colour=party)) +
geom_point(aes(y=score), size=1) +
theme_minimal() +
geom_smooth(method = "loess", se = FALSE)+
scale_color_manual(values = c("blue","red"))
ggplotly(p4)
## `geom_smooth()` using formula = 'y ~ x'